import HTMLParser
import urllib
import re
import string


urlString = "http://www.twix.net/member_e.html"
urlText = []
pathOut = "ixp_members.txt"


class Parse28 (HTMLParser.HTMLParser):
    
    check = False
    
    def handle_starttag (self, tag, attrs):
        if tag == "td":
            self.check = True
            
    def handle_endtag (self, tag):
        if tag == "td" and self.check == True:
            self.check = False
    
    def handle_data (self, data):
        if self.check == True and re.match("^\d+ ?$",data):
            data = string.rstrip(data)
            if data not in urlText:
                urlText.append(data)


lparser = Parse28()
page = urllib.urlopen(urlString).read()
# devo eliminare un tag <embed> (linea 48) dal codice html altrimenti il parser si ferma
page = re.sub("<embed[^>]*></embed>", "", page)
lparser.feed(page)
fileOut = open(pathOut, "a")
for item in urlText:
    #print item # debug
    print >> fileOut, "28 %s" % item
fileOut.close()